#  -*- coding = utf-8 -*-
# @Time :  18:44
# @Author: blackshark
#@File :党委学生部.py
#@SoftWare : PyCharm

import requests
from bs4 import BeautifulSoup
import re

url="http://xsc.scu.edu.cn"
pageUrl=["http://xsc.scu.edu.cn/Website/XG/Home/NewsList?APvRSjfI7vuqRRz5liqbWPW-RxC_gMd3UW_6wEVYUMg=.shtml","http://xsc.scu.edu.cn/Website/XG/Home/NewsList?dRdvMU83d9zmFAhKvSPId2BzVaZ8aaV8xb_yUvYG6S4=.shtml"]
k=0
subUrl=[]
title=[]
information=[]
passage=[]
imgUrl=[]
pagePicNum=[]
picNum=0

for x in range(len(pageUrl)):

    response = requests.get(pageUrl[x])
    response.encoding = response.apparent_encoding
    soup = BeautifulSoup(response.text, "html.parser")
    get = soup.find_all('div', class_='news-list')
    for i in range(len(get)) :
        p=re.findall(r"href=(.+?)shtml", str(get[i]))
        for j in range(len(p)):
            subUrl.append(url+p[j][1:]+"shtml")


for y in range(len(subUrl)):
    response = requests.get(subUrl[y])
    response.encoding = response.apparent_encoding
    soup = BeautifulSoup(response.text, "html.parser")
    get_title = soup.find('div', class_='news-title')
    title.append(get_title.find('p').get_text())
    information.append(get_title.find('p',class_="author").get_text())
    get_passage = soup.find('div', class_='ke-content')
    passage.append(get_passage.get_text())

    p = re.findall(r"src=\"(.+?)\"{1,120}", str(get_passage))
    for j in range(len(p)):
        imgUrl.append(p[j])
    pagePicNum.append(len(p))




#     imgArry=content.find_all('p',style="text-align: center;")
#     for i in range(len(imgArry)):
#         imgUrl.append(url+''.join(re.findall(r"orisrc=(.+?)jpg",str(imgArry[i])))[1:]+"jpg")
#         picNum=picNum+1
#     pagePicNum.append(picNum)
#     picNum=0







# for i in range(len(subUrl)):
#     print(subUrl[i])
# for i in range(len(title)):
#      print(title[i])
# print(len(title))
# for i in range(len(information)):
#     print(information[i])
# for i in range(len(passage)):
#     print(passage[i])
# print(len(passage))
# for i in range(len(imgUrl)):
#     print(imgUrl[i])
# for i in range(len(pagePicNum)):
#     print(pagePicNum[i])